ISSS608 Visual Analytics & Applications Coursework
by Wilson Tan
  • Hands-on Exercises
    • Week 1: Hands-on Exercise
    • Week 2: Hands-on Exercise
    • Week 3: Hands-on Exercise
    • Week 4: Hands-on Exercise
  • In-class Exercises
    • Week 1: In-class Exercise
    • Week 2: In-class Exercise
    • Week 3: In-class Exercise
    • Week 4: In-class Exercise
    • Week 5: In-class Exercise
    • Week 6: In-class Exercise
    • Week 7: In-class Exercise
  • Take-home Exercises
    • Take-home Exercise 01
    • Take-home Exercise 02

Take-home Exercise 02

json_file_path <- "data/mc2_challenge_graph.json"
mc2_file_path <- "data/mc2.rds"

if (!file.exists(mc2_file_path)) {
  mc2 <- fromJSON(json_file_path)
  saveRDS(mc2, mc2_file_path)
} else {
  mc2 <- readRDS(mc2_file_path)
}
mc2_nodes <- as_tibble(mc2$nodes) %>%
  select(id, shpcountry, rcvcountry)

mc2_edges <- as_tibble(mc2$links) %>%
  mutate(ArrivalDate = ymd(arrivaldate)) %>%
  mutate(Year = year(ArrivalDate)) %>%
  select(
    source,
    target,
    ArrivalDate,
    Year,
    hscode,
    valueofgoods_omu,
    volumeteu,
    weightkg,
    valueofgoodsusd
  ) %>%
  distinct()

mc2_edges$grp_hscode <- substr(mc2_edges$hscode, 1, 1)
ids <- union(unique(mc2_edges$source),
            unique(mc2_edges$target)) %>% sort() %>% as_tibble()
colnames(ids) <- "name"
ids <- ids %>% mutate(cid = row_number())
ids$cid <- factor(ids$cid)
mc2_edges_agg <- mc2_edges %>%
  group_by(source, target, grp_hscode, Year) %>%
  summarise(num_trades = n(),
            total_weightkg = sum(weightkg)) %>%
  filter(source != target) %>%
  filter(num_trades > 20) %>%
  ungroup()
id1 <- mc2_edges_agg %>%
  select(source) %>%
  rename(id = source)
id2 <- mc2_edges_agg %>%
  select(target) %>%
  rename(id = target)
mc2_nodes_extracted <- rbind(id1, id2) %>%
  distinct()
mc2_nodes_extracted <- merge(mc2_nodes_extracted,
                             ids,
                             by.x = "id",
                             by.y = "name")
rm(id1, id2)
mc2_graph <- tbl_graph(nodes = mc2_nodes_extracted,
                       edges = mc2_edges_agg,
                       directed = TRUE) %>%
  activate(nodes) %>%
  mutate(betweenness_centrality = centrality_betweenness(weights = num_trades)) %>%
  mutate(outdegree_centrality = centrality_degree(weights = num_trades,
                                                  mode = "out"))
years = c("2028")

for (y in years) {
  mygraph <- paste("mc2", "graph", y, sep = "_")
  assign(
    mygraph,
    mc2_graph %>%
      activate(edges) %>%
      filter(Year == y) %>%
      activate(nodes) %>%
      filter(!node_is_isolated()) %>%
      mutate(betweenness_centrality = centrality_betweenness(weights = num_trades)) %>%
      mutate(outdegree_centrality = centrality_degree(weights = num_trades,
                                                      mode = "out"))
  )

  assign(
    paste("g", y, sep = "_"),
    ggraph(get(mygraph),
           layout = "nicely") +
      geom_edge_link(aes(width = num_trades,
                         color = grp_hscode),
                     alpha = 0.6) +
      scale_edge_width(range = c(0.4, 4), name = "Total weight") +
      scale_edge_color_brewer(name = "HS code group",
                              palette = "Set1") +
      geom_point_interactive(
        aes(
          x = x,
          y = y,
          tooltip = paste0(
            "Name:  ", id,
            "\nCompany ID:  ", cid,
            "\nOut-degree:  ", outdegree_centrality,
            "\nBetweenness:  ", betweenness_centrality
          ),
          data_id = outdegree_centrality > 0,
          size = betweenness_centrality,
          fill = outdegree_centrality > 0
        ),
        colour = "grey20",
        shape = 21,
        alpha = 0.8
      ) +
      scale_fill_manual(labels = c("Zero", "Non-zero"), values = c("cyan", "firebrick1"), name = "Out-degree") +
      scale_size_continuous(range = (c(1, 10)), name = "Betweenness") +
      theme_graph(
        foreground = "grey20",
      ) +
      labs(title = y) +
      theme(plot.title = element_text(size = 11))
  )
}

rm(y, years, mygraph)

Network graph for year 2028

cat("There are", length(unique(
  c(mc2_edges_agg$source, mc2_edges_agg$target)
)), "companies in the dataset.", "\n")

suspects <- mc2_graph %>% activate(nodes) %>% data.frame() %>% tibble() %>%
  filter(betweenness_centrality == 0) %>%
  filter(outdegree_centrality > 0)

companies_closed_down_1 <- mc2_edges_agg %>%
  group_by(source) %>%
  summarise(year_of_closure = max(Year))
companies_closed_down_2 <- mc2_edges_agg %>%
  group_by(target) %>%
  summarise(year_of_closure = max(Year))

companies_closed_down <-
  merge(
    companies_closed_down_1,
    companies_closed_down_2,
    by.x = "source",
    by.y = "target",
    all = TRUE
  )
companies_closed_down$year_of_closure <-
  pmax(
    companies_closed_down$year_of_closure.x,
    companies_closed_down$year_of_closure.y,
    na.rm = TRUE
  )
suspects <- companies_closed_down %>%
  select(name = source, year_of_closure) %>%
  filter(year_of_closure < max(mc2_edges_agg$Year)) %>%
  filter(name %in% suspects$id)

cat(
  nrow(suspects),
  "companies with zero betweenness centrality and non-zero out-degree centrality have closed down prematurely.",
  "\n"
)
cat("These companies are:", "\n")

datatable(suspects)
rm(companies_closed_down_1, companies_closed_down_2, companies_closed_down)
There are 6299 companies in the dataset. 
1545 companies with zero betweenness centrality and non-zero out-degree centrality have closed down prematurely. 
These companies are: 
mc2_graph <- mc2_graph %>%
  activate(nodes) %>%
  mutate(suspicious = ifelse(id %in% suspects$name, "Yes", "No"))
years = c("2028", "2031", "2034")

for (y in years) {
  mygraph <- paste("mc2", "graph", y, sep = "_")
  assign(
    mygraph,
    mc2_graph %>%
      activate(edges) %>%
      filter(Year == y) %>%
      activate(nodes) %>%
      filter(!node_is_isolated()) %>%
      mutate(betweenness_centrality = centrality_betweenness(weights = num_trades)) %>%
      mutate(outdegree_centrality = centrality_degree(weights = num_trades,
                                                      mode = "out"))
  )

  assign(
    paste("g", y, sep = "_"),
    ggraph(get(mygraph),
           layout = "nicely") +
      geom_edge_link(aes(width = num_trades,
                         color = grp_hscode),
                     alpha = 0.6) +
      scale_edge_width(range = c(0.4, 4), name = "Total weight") +
      scale_edge_color_brewer(name = "HS code group",
                              palette = "Set1") +
      geom_point_interactive(
        aes(
          x = x,
          y = y,
          tooltip = paste0(
            "Name:  ", id,
            "\nCompany ID:  ", cid,
            "\nOut-degree:  ", outdegree_centrality,
            "\nBetweenness:  ", betweenness_centrality,
            "\nSuspicious?:  ", suspicious
          ),
          data_id = suspicious,
          size = betweenness_centrality,
          fill = suspicious
        ),
        colour = "grey20",
        shape = 21,
        alpha = 0.8
      ) +
      scale_fill_manual(values = c("cyan", "firebrick1"), name = "Suspicious?") +
      scale_size_continuous(range = (c(1, 10)), name = "Betweenness") +
      theme_graph(
        foreground = "grey20",
      ) +
      labs(title = y) +
      theme(plot.title = element_text(size = 11))
  )
}

rm(y, years, mygraph)

Tabbed by years

  • 2028
  • 2031
  • 2034